Description of individual datasets

SCMD

  • The SQL query below is used to get the SCMD data from Google BigQuery
  • Individual analyses need to filter for specific vmp_snomed_codes in a subsequent query
# Define SQL query for Secondary Care Medicines Data (SCMD) data 
# Select variables and calculate quantity of medicines grouped by
# year_month, ods_code, vmp_snomed_code, vmp_snomed_name
sql_query_scmd <- dbplyr::sql("SELECT
                               year_month,
                               ods_code,
                               vmp_snomed_code,
                               vmp_product_name,

                               SUM(total_quanity_in_vmp_unit) AS total_quantity
                               
                               FROM ebmdatalab.scmd.scmd
                                   
                               GROUP BY
                               year_month,
                               ods_code,
                               vmp_snomed_code,
                               vmp_product_name")

Overview of the data

# BIG database stuff read from csv instead
# I'm not adding this to github, so needs to be run once to set up
# db_scmd <- dplyr::tbl(conn_ebm_scmd, sql_query_scmd)
# Collect data here
# df_scmd <- db_scmd %>% 
#   collect()
# Write csv do disk
# write_csv(df_scmd, here("data/scmd.csv"))

# Once the commented code above is run, we can use the local csv file
df_scmd <- read_csv(here("data/scmd.csv"),
                    col_types = cols(vmp_snomed_code = col_character()))

# create lookup table to join vmp names to codes
df_scmd_lookup <- df_scmd %>% 
  select(vmp_snomed_code, vmp_product_name) %>% 
  distinct()

# get date range for tables or in text use
scmd_date_range <- range(df_scmd$year_month)
n_ods_codes <- length(unique(df_scmd$ods_code))
n_snomed_codes <- length(unique(df_scmd$vmp_snomed_code))
sum_quantity_positive <- sum(df_scmd$total_quantity[which(df_scmd$total_quantity > 0)])
sum_quantity_negative <- sum(df_scmd$total_quantity[which(df_scmd$total_quantity < 0)])

# calculate total count for each product
df_scmd_distinct <- df_scmd %>% 
  group_by(vmp_snomed_code) %>% 
  summarise(count = sum(total_quantity, na.rm = TRUE)) %>% 
  ungroup() %>% 
  arrange(desc(count)) %>% 
  left_join(df_scmd_lookup)
  • The date range in this notebooks is from 2019-01-01 to 2020-11-01

  • The SCMD dataset has n = 220 unique ODS codes and n = 10870 unique SNOMED codes

  • The sum of all positive quantities is: 38,562,782,582

  • The sum of all negative quantities is: -143,874,967

  • The table below shows the first 100 entries of the SCMD dataset (arranged by vmp_snomed_code) from BigQuery

reactable(head(df_scmd, 100),
          style = list(fontSize = "12px"),
          highlight = TRUE,
          filterable = TRUE)

Frequency of hospitals with data per month

temp_ggplot <- df_scmd %>% 
  select(year_month, ods_code) %>% 
  distinct() %>% 
  group_by(year_month) %>% 
  count() %>% 
  ggplot(aes(x = year_month, 
             y = n)) +
  geom_line(size = 1, alpha = 0.5) +
  geom_point() +
  scale_colour_viridis_d() +
  scale_x_date(date_breaks = "4 month", 
               date_labels =  "%b %y") +
  labs(x = NULL, y = "Number of Hospitals",
       colour = NULL) +
  # geom_vline(xintercept = as.numeric(as.Date("2020-03-31")), 
             # color = "orange", 
             # linetype = 2,
             # lwd = .5, 
             # alpha = .5) +
  theme(text = element_text(size = 12))

# temp_ggplot

plotly::ggplotly(temp_ggplot,
                 tooltip = "text") %>%
  plotly::config(displayModeBar = FALSE)

Figure. Number of hospitals contributing monthly data.

NHSBSA dm+d

  • ADD DESCRIPTION (MILAN)

dm+d info

db_dmd_info <- dplyr::tbl(conn_ebm_scmd, sql_query_dmd_info)
# Define SQL query for Dictionary of Medicines and Devices (dm+d) information
# Rename some variables to match names across different queries (e.g., vmp_snomed_code)
sql_query_dmd_info <- dbplyr::sql("SELECT
                                   CAST(a.id AS STRING) AS vmp_snomed_code,
                                   a.nm AS vmp_product_name,
                                   a.vtm AS vtmid,
                                   j.nm AS vtmnm,
                                   b.form AS form_cd,
                                   c.descr AS form_descr,
                                   a.df_ind AS df_ind_cd,
                                   d.descr AS df_descr,
                                   a.udfs,
                                   e.descr AS udfs_descr,
                                   f.descr AS unit_dose_descr,
                                   g.strnt_nmrtr_val,
                                   h.descr AS strnt_nmrtr_uom,
                                   g.strnt_dnmtr_val,
                                   i.descr AS strnt_dnmtr_descr,
                                   a.bnf_code,
                                   k.presentation AS bnf_presentation
                                   
                                   FROM ebmdatalab.dmd.vmp AS a

                                   LEFT JOIN ebmdatalab.dmd.dform AS b
                                   ON a.id = b.vmp

                                   LEFT JOIN ebmdatalab.dmd.form AS c
                                   ON b.form = c.cd

                                   LEFT JOIN ebmdatalab.dmd.dfindicator AS d
                                   ON a.df_ind = d.cd

                                   LEFT JOIN ebmdatalab.dmd.unitofmeasure AS e
                                   ON a.udfs_uom = e.cd

                                   LEFT JOIN ebmdatalab.dmd.unitofmeasure AS f
                                   ON a.unit_dose_uom = f.cd

                                   LEFT JOIN ebmdatalab.dmd.vpi AS g
                                   ON a.id = g.vmp

                                   LEFT JOIN ebmdatalab.dmd.unitofmeasure AS h
                                   ON g.strnt_nmrtr_uom = h.cd

                                   LEFT JOIN ebmdatalab.dmd.unitofmeasure AS i
                                   ON g.strnt_dnmtr_uom = i.cd

                                   LEFT JOIN ebmdatalab.dmd.vtm AS j
                                   ON a.vtm = j.id

                                   LEFT JOIN ebmdatalab.hscic.bnf AS k
                                   ON a.bnf_code = k.presentation_code")
db_dmd_info_distinct <- db_dmd_info %>% 
  select(vmp_snomed_code, udfs, vtmid, form_cd, strnt_dnmtr_val, strnt_nmrtr_val, strnt_dnmtr_val, strnt_dnmtr_descr, bnf_code) %>% 
  distinct()

df_dmd_info_distinct <- db_dmd_info_distinct %>% 
  collect()

df_dmd_info_nmrt <- df_dmd_info_distinct %>% 
  drop_na(strnt_nmrtr_val) %>% 
  left_join(df_scmd_lookup) %>% 
  dplyr::relocate(vmp_product_name, vmp_snomed_code) %>% 
  arrange(vmp_snomed_code) %>% 
  mutate(vmp_product_name = fct_explicit_na(vmp_product_name)) 
  


reactable(df_dmd_info_nmrt,
          columns = list(
            udfs = reactable::colDef(show = FALSE),
            vtmid = reactable::colDef(show = FALSE),
            form_cd = reactable::colDef(show = FALSE),
            bnf_code = reactable::colDef(show = FALSE)
            ),
          style = list(fontSize = "12px"),
          highlight = TRUE,
          filterable = TRUE)

ddd

# get ddd data and make var names consistent
df_ddd <- read_csv(here("data/ddd_week492021.csv"), 
                   col_types = cols(VPID = col_character())) %>% 
  janitor::clean_names() %>% 
  rename(vmp_snomed_code = vpid)

reactable(df_ddd,
          style = list(fontSize = "12px"),
          highlight = TRUE,
          filterable = TRUE)

Combined dataset

  • The table below shows the total count (‘Total count’) of each product (‘SNOMED Name and Code’) and whether we have information about ddd (‘ddd available’)
  • You can filter the table by positive or negative count and availability of ddd (TRUE = “ddd info available”; FALSE = “product listed in dm+d dataset but no ddd info not available”; (Missing) = “product NOT listed in dm+d dataset, therefore no ddd info not available”)
df_ddd_avail <- df_ddd %>% 
  mutate(ddd_avail = factor(!is.na(ddd)),
         bnf_avail = factor(!is.na(bnf))) %>% 
  select(vmp_snomed_code, ddd_avail)
df_scmd_tab <- df_scmd_distinct %>% 
  left_join(df_ddd_avail) %>% 
  mutate(ddd_avail = fct_explicit_na(ddd_avail)) %>% 
  select(vmp_product_name, vmp_snomed_code, count, ddd_avail) %>% 
  mutate(positive_count = factor(count >= 0, levels = c(F, T),
                                 labels = c("Negative", "Positive")))
df_scmd_tab_shared <- SharedData$new(df_scmd_tab)

bscols(widths = c(3, 9),
       list(
         filter_checkbox("positive_count", "Count", 
                         df_scmd_tab_shared, ~positive_count,
                         inline = TRUE),
         filter_select("ddd_avail", "Filter ddd availability", 
                       df_scmd_tab_shared, ~ddd_avail)),
       reactable(df_scmd_tab_shared,
                 columns = list(
                   vmp_product_name = colDef(name = "Name",
                                             minWidth = 100),
                   vmp_snomed_code = colDef(name = "Code",
                                             minWidth = 50),
                   count = colDef(name = "Total count",
                                             minWidth = 30,
                                  format = colFormat(digits = 0)),
                   ddd_avail = colDef(name = "ddd available",
                                             minWidth = 30),
                   positive_count = reactable::colDef(show = FALSE)
                 ),
                 columnGroups = list(
                   colGroup(name = "SNOMED", 
                            columns = c("vmp_product_name", "vmp_snomed_code"))
                 ),
                 style = list(fontSize = "12px"),
                 highlight = TRUE,
                 filterable = TRUE,
                 defaultSorted = list(count = "desc")))